library(tidyverse) ; library(reshape2) ; library(glue) ; library(plotly) ; library(dendextend)
library(RColorBrewer) ; library(viridis) ; require(gridExtra) ; library(GGally) ; library(ggpubr)
library(expss)
library(polycor)
library(foreach) ; library(doParallel)
library(knitr) ; library(kableExtra)
library(biomaRt)
library(clusterProfiler) ; library(ReactomePA) ; library(DOSE) ; library(org.Hs.eg.db)
library(WGCNA)
SFARI_colour_hue = function(r) {
pal = c('#FF7631','#FFB100','#E8E328','#8CC83F','#62CCA6','#59B9C9','#b3b3b3','#808080','gray','#d9d9d9')[r]
}
Load preprocessed dataset (preprocessing code in 01_data_preprocessing.Rmd) and clustering (pipeline in 05_WGCNA.Rmd)
# Gandal dataset
load('./../Data/preprocessed_data.RData')
datExpr = datExpr %>% data.frame
DE_info = DE_info %>% data.frame
# GO Neuronal annotations: regex 'neuron' in GO functional annotations and label the genes that make a match as neuronal
GO_annotations = read.csv('./../Data/genes_GO_annotations.csv')
GO_neuronal = GO_annotations %>% filter(grepl('neuron', go_term)) %>%
mutate('ID'=as.character(ensembl_gene_id)) %>%
dplyr::select(-ensembl_gene_id) %>% distinct(ID) %>%
mutate('Neuronal'=1)
# SFARI Genes
SFARI_genes = read_csv('./../../../SFARI/Data/SFARI_genes_01-03-2020_w_ensembl_IDs.csv')
SFARI_genes = SFARI_genes[!duplicated(SFARI_genes$ID) & !is.na(SFARI_genes$ID),]
# Old SFARI Genes
old_SFARI_genes = read_csv('./../../../SFARI/Data/SFARI_genes_08-29-2019_w_ensembl_IDs.csv')
old_SFARI_genes = old_SFARI_genes[!duplicated(old_SFARI_genes$ID) & !is.na(old_SFARI_genes$ID),]
# Clusterings
clusterings = read_csv('./../Data/clusters.csv')
# Update DE_info with SFARI and Neuronal information
genes_info = DE_info %>% mutate('ID'=rownames(.)) %>% left_join(SFARI_genes, by='ID') %>%
mutate(`gene-score`=ifelse(is.na(`gene-score`), 'Others', `gene-score`)) %>%
left_join(old_SFARI_genes %>% rename(`gene-score` = 'old-gene-score') %>%
dplyr::select(ID, `old-gene-score`), by = 'ID') %>%
mutate(`old-gene-score`=ifelse(is.na(`old-gene-score`), 'Others', `old-gene-score`)) %>%
left_join(GO_neuronal, by='ID') %>% left_join(clusterings, by='ID') %>%
mutate(Neuronal=ifelse(is.na(Neuronal), 0, Neuronal)) %>%
mutate(gene.score=ifelse(`gene-score`=='Others' & Neuronal==1, 'Neuronal', `gene-score`),
old.gene.score=ifelse(`old-gene-score`=='Others' & Neuronal==1,'Neuronal',`old-gene-score`),
significant=padj<0.05 & !is.na(padj))
# Add gene symbol
getinfo = c('ensembl_gene_id','external_gene_id')
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl',
host='feb2014.archive.ensembl.org') ## Gencode v19
gene_names = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), values=genes_info$ID, mart=mart)
genes_info = genes_info %>% left_join(gene_names, by=c('ID'='ensembl_gene_id'))
clustering_selected = 'DynamicHybrid'
genes_info$Module = genes_info[,clustering_selected]
dataset = read.csv(paste0('./../Data/dataset_', clustering_selected, '.csv'))
dataset$Module = dataset[,clustering_selected]
dataset$gene.score = as.character(dataset$gene.score)
dataset$gene.score[dataset$gene.score=='None'] = 'Others'
rm(DE_info, GO_annotations, clusterings, getinfo, mart, dds)
plot_data = dataset %>% dplyr::select(Module, MTcor) %>% distinct %>%
mutate(alpha = ifelse(abs(MTcor)>0.9, 1, 0.3))
top_modules = plot_data %>% arrange(desc(MTcor)) %>% filter(abs(MTcor)>0.9) %>% pull(Module) %>% as.character
Selecting Modules with a Module-Diagnosis correlation magnitude larger than 0.9.
The 4 modules that fulfill this condition are #88AC00, #FE61CD, #00BADE, #D177FF
ggplotly(plot_data %>% ggplot(aes(reorder(Module, -MTcor), MTcor)) +
geom_bar(stat='identity', fill = plot_data$Module, alpha = plot_data$alpha) +
geom_hline(yintercept =c(0.9, -0.9), color = 'gray', linetype = 'dotted') +
xlab('Modules')+ ylab('Module-Diagnosis Correlation') + theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)))
The modules consist mainly of points with high (absolute) values in PC2 (which we know is related to lfc), so this result is consistent with the high correlation between Module and Diagnosis, although some of the points with the highest PC2 values do not belong to these top modules
The genes belonging to the modules with the positive Module-Diagnosis correlation have positive LFC values and the genes belonging to the modules with the negative Module-Diagnosis correlation have negative values.
pca = datExpr %>% prcomp
plot_data = data.frame('ID'=rownames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2]) %>%
left_join(dataset, by='ID') %>%
left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>%
dplyr::select(ID, external_gene_id, PC1, PC2, Module, gene.score) %>%
mutate(ImportantModules = ifelse(Module %in% top_modules, as.character(Module), 'Others')) %>%
mutate(color = ifelse(ImportantModules=='Others','gray',ImportantModules),
alpha = ifelse(ImportantModules=='Others', 0.2, 0.4),
gene_id = paste0(ID, ' (', external_gene_id, ')')) %>%
apply_labels(ImportantModules = 'Top Modules')
cro(plot_data$ImportantModules)
|  #Total | |
|---|---|
|  Top Modules | |
| Â Â Â #00BADEÂ | 1476 |
| Â Â Â #88AC00Â | 900 |
| Â Â Â #D177FFÂ | 597 |
| Â Â Â #FE61CDÂ | 85 |
|    Others | 13074 |
|    #Total cases | 16132 |
ggplotly(plot_data %>% ggplot(aes(PC1, PC2, color=ImportantModules)) +
geom_point(alpha=plot_data$alpha, color=plot_data$color, aes(ID=gene_id)) + theme_minimal() +
xlab(paste0('PC1 (',round(100*summary(pca)$importance[2,1],2),'%)')) +
ylab(paste0('PC2 (',round(100*summary(pca)$importance[2,2],2),'%)')) +
ggtitle('Genes belonging to the Modules with the strongest relation to ASD'))
rm(pca)
List of top 20 SFARI Genes in top modules ordered by SFARI score and Gene Significance
list_top_SFARI_genes = function(table_data, module) {
t = table_data %>% filter(Module == module & `SFARI score` %in% c(1,2,3)) %>%
slice_head(n=20) %>% dplyr::select(-Module, -`Ensembl ID`)
return(t)
}
table_data = dataset %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>%
dplyr::select(ID, external_gene_id, GS, gene.score, Module) %>%
arrange(gene.score, desc(abs(GS))) %>%
dplyr::rename('Ensembl ID' = ID, 'Gene Symbol' = external_gene_id,
'SFARI score' = gene.score, 'Gene Significance' = GS)
kable(list_top_SFARI_genes(table_data, top_modules[1]),
caption=paste0('Top SFARI Genes for Module ', top_modules[1])) %>%
kable_styling(full_width = F)
| Gene Symbol | Gene Significance | SFARI score |
|---|---|---|
| TRIO | 0.6258926 | 1 |
| KDM6B | 0.3283297 | 1 |
| NR4A2 | 0.2443968 | 1 |
| CACNA1C | 0.2095202 | 1 |
| RELN | -0.0999774 | 1 |
| TET2 | 0.8173977 | 2 |
| KAT6A | 0.5987963 | 2 |
| INTS6 | 0.5494694 | 2 |
| NLGN1 | 0.3923348 | 2 |
| CPEB4 | 0.3675908 | 2 |
| PHF2 | 0.3657805 | 2 |
| FOXP2 | 0.2504688 | 2 |
| AGO4 | 0.2169331 | 2 |
| SEMA5A | 0.2152763 | 2 |
| CACNB2 | 0.1672467 | 2 |
| KDM1B | 0.8223813 | 3 |
| SERPINE1 | 0.7901630 | 3 |
| PLAUR | 0.7762049 | 3 |
| SNTG2 | 0.7351118 | 3 |
| RNF135 | 0.6754056 | 3 |
kable(list_top_SFARI_genes(table_data, top_modules[2]),
caption=paste0('Top SFARI Genes for Module ', top_modules[2])) %>%
kable_styling(full_width = F)
| Gene Symbol | Gene Significance | SFARI score |
|---|---|---|
| JARID2 | 0.5463074 | 2 |
| AVPR1A | 0.1144397 | 2 |
| THBS1 | 0.8071354 | 3 |
| DPYSL3 | 0.6507469 | 3 |
| PCDH11X | -0.0219558 | 3 |
kable(list_top_SFARI_genes(table_data, top_modules[3]),
caption=paste0('Top SFARI Genes for Module ', top_modules[3])) %>%
kable_styling(full_width = F)
| Gene Symbol | Gene Significance | SFARI score |
|---|---|---|
| SCN1A | -0.8001031 | 1 |
| SCN8A | -0.7886101 | 1 |
| KCNB1 | -0.7190325 | 1 |
| NCKAP1 | -0.6912015 | 1 |
| NRXN3 | -0.6712196 | 1 |
| TBR1 | -0.6557760 | 1 |
| BRAF | -0.5717387 | 1 |
| SCN2A | -0.5365881 | 1 |
| ANK2 | -0.4350014 | 1 |
| NBEA | -0.4139469 | 1 |
| KIAA2022 | -0.3993686 | 1 |
| BCL11A | -0.3854374 | 1 |
| CDKL5 | -0.3651548 | 1 |
| RORB | -0.1995688 | 1 |
| MEIS2 | -0.0223285 | 1 |
| HIVEP2 | 0.0187984 | 1 |
| NUAK1 | -0.8224543 | 2 |
| DLGAP1 | -0.7790358 | 2 |
| EFR3A | -0.7485311 | 2 |
| RBFOX1 | -0.7315240 | 2 |
kable(list_top_SFARI_genes(table_data, top_modules[4]),
caption=paste0('Top SFARI Genes for Module ', top_modules[4])) %>%
kable_styling(full_width = F)
| Gene Symbol | Gene Significance | SFARI score |
|---|---|---|
| SLC6A1 | -0.5297151 | 1 |
| GIGYF1 | -0.5275482 | 1 |
| SMARCC2 | -0.4317714 | 1 |
| DYRK1A | -0.2759465 | 1 |
| PAH | -0.6902687 | 2 |
| OTUD7A | -0.4714962 | 2 |
| TAF6 | -0.4144509 | 2 |
| CACNA1H | -0.4136128 | 2 |
| MYH10 | -0.3265092 | 2 |
| OXTR | -0.1685022 | 2 |
| RAB11FIP5 | -0.8337588 | 3 |
| UNC13A | -0.6675531 | 3 |
| EXOC3 | -0.5343061 | 3 |
| LRRC4 | -0.4747208 | 3 |
| ADK | -0.4679812 | 3 |
| C16orf13 | -0.4395837 | 3 |
| HSD11B1 | -0.3660044 | 3 |
| AZGP1 | -0.3616651 | 3 |
| MCM6 | -0.2693989 | 3 |
| FAM98C | -0.2505459 | 3 |
rm(table_data, list_top_SFARI_genes)
Since these modules have the strongest relation to autism, this pattern should be reflected in their model eigengenes, having two different behaviours for the samples corresponding to autism and the ones corresponding to control
In all cases, the Eigengenes separate the behaviour between autism and control samples very clearly (p-value < \(10^{-4}\)). Modules with positive Module-Diagnosis correlation have higher eigengenes in the ASD samples and Modules with a negative correlation, in the Control samples
plot_EGs = function(module){
plot_data = data.frame('ID' = rownames(MEs), 'MEs' = MEs[,paste0('ME', module)],
'Diagnosis' = datMeta$Diagnosis)
p = plot_data %>% ggplot(aes(Diagnosis, MEs, fill=Diagnosis)) +
geom_boxplot(outlier.colour='#cccccc', outlier.shape='o', outlier.size=3) +
ggtitle(paste0('Module ', module, ' (MTcor=',round(dataset$MTcor[dataset$Module == module][1],2),')')) +
stat_compare_means(method = 't.test', method.args = list(var.equal = FALSE), label = 'p.signif',
ref.group = 'ASD') +
ylab('Module Eigengenes') + theme_minimal() + theme(legend.position='none')
return(p)
}
# Calculate Module Eigengenes
ME_object = datExpr %>% t %>% moduleEigengenes(colors = genes_info$Module)
MEs = orderMEs(ME_object$eigengenes)
p1 = plot_EGs(top_modules[1])
p2 = plot_EGs(top_modules[2])
p3 = plot_EGs(top_modules[3])
p4 = plot_EGs(top_modules[4])
grid.arrange(p1, p2, p3, p4, nrow=2)
rm(plot_EGs, ME_object, MEs, p1, p2, p3, p4)
In the WGCNA pipeline, the most representative genes of each module are selected based on having a high module membership as well as a high (absolute) gene significance, so I’m going to do the same
SFARI Genes don’t seem to be more representative than the rest of the genes
create_plot = function(module){
plot_data = dataset %>% dplyr::select(ID, paste0('MM.',gsub('#','',module)), GS, gene.score) %>%
filter(dataset$Module==module)
colnames(plot_data)[2] = 'Module'
SFARI_colors = as.numeric(names(table(as.character(plot_data$gene.score)[plot_data$gene.score!='Others'])))
p = ggplotly(plot_data %>% mutate(gene.score = ifelse(gene.score =='Others', 'Not in SFARI', gene.score)) %>%
ggplot(aes(Module, GS, color=gene.score)) +
geom_point(alpha=0.5, aes(ID=ID)) + xlab('Module Membership') + ylab('Gene Significance') +
ggtitle(paste0('Module ', module, ' (MTcor = ',
round(dataset$MTcor[dataset$Module == module][1],2),')')) +
scale_color_manual(values=SFARI_colour_hue(r=c(SFARI_colors,7))) + theme_minimal())
return(p)
}
create_plot(top_modules[1])
create_plot(top_modules[2])
create_plot(top_modules[3])
create_plot(top_modules[4])
rm(create_plot)
Ordered by \(\frac{MM+|GS|}{2}\)
There aren’t that many SFARI genes in the top genes of the modules
create_table = function(module){
top_genes = dataset %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>%
dplyr::select(ID, external_gene_id, paste0('MM.',gsub('#','',module)), GS, gene.score) %>%
filter(dataset$Module==module) %>% dplyr::rename('MM' = paste0('MM.',gsub('#','',module))) %>%
mutate(Relevance = (MM+abs(GS))/2,
gene.score = ifelse(gene.score =='Others', 'Not in SFARI', gene.score)) %>%
arrange(by=-Relevance) %>% top_n(20) %>%
dplyr::rename('Gene Symbol' = external_gene_id, 'SFARI Score' = gene.score)
return(top_genes)
}
top_genes = list()
for(i in 1:length(top_modules)) top_genes[[i]] = create_table(top_modules[i])
kable(top_genes[[1]] %>% dplyr::select(-ID), caption=paste0('Top 20 genes for Module ', top_modules[1],
' (MTcor = ', round(dataset$MTcor[dataset$Module == top_modules[1]][1],2),')')) %>%
kable_styling(full_width = F)
| Gene Symbol | MM | GS | SFARI Score | Relevance |
|---|---|---|---|---|
| MCL1 | 0.8465338 | 0.9072989 | Not in SFARI | 0.8769163 |
| ITGA5 | 0.8339945 | 0.8593981 | Not in SFARI | 0.8466963 |
| PXN | 0.7985870 | 0.8684998 | Not in SFARI | 0.8335434 |
| SRGAP1 | 0.7369491 | 0.9218585 | Not in SFARI | 0.8294038 |
| CFLAR | 0.8089743 | 0.8164817 | Not in SFARI | 0.8127280 |
| SERPINE1 | 0.8255616 | 0.7901630 | 3 | 0.8078623 |
| ITPRIP | 0.7902462 | 0.8158919 | Not in SFARI | 0.8030690 |
| LATS2 | 0.7446791 | 0.8495642 | Not in SFARI | 0.7971216 |
| RREB1 | 0.7421619 | 0.8436186 | Not in SFARI | 0.7928902 |
| PPP1R15B | 0.7507284 | 0.8217099 | Not in SFARI | 0.7862192 |
| DUSP5 | 0.8034792 | 0.7635489 | Not in SFARI | 0.7835141 |
| IGF2BP2 | 0.7271384 | 0.8384871 | Not in SFARI | 0.7828127 |
| MYOF | 0.7301644 | 0.8331343 | Not in SFARI | 0.7816494 |
| PLEKHG1 | 0.7536775 | 0.8078314 | Not in SFARI | 0.7807545 |
| AFF4 | 0.7900222 | 0.7704519 | Not in SFARI | 0.7802371 |
| ELF1 | 0.7740647 | 0.7862097 | Not in SFARI | 0.7801372 |
| OLFML2B | 0.7032587 | 0.8525670 | Not in SFARI | 0.7779128 |
| ADAMTS15 | 0.7404845 | 0.8088659 | Not in SFARI | 0.7746752 |
| BTG1 | 0.7397583 | 0.8080613 | Not in SFARI | 0.7739098 |
| FCGRT | 0.6902009 | 0.8564067 | Not in SFARI | 0.7733038 |
kable(top_genes[[2]] %>% dplyr::select(-ID), caption=paste0('Top 20 genes for Module ', top_modules[2],
' (MTcor = ', round(dataset$MTcor[dataset$Module == top_modules[2]][1],2),')')) %>%
kable_styling(full_width = F)
| Gene Symbol | MM | GS | SFARI Score | Relevance |
|---|---|---|---|---|
| THBS1 | 0.7478563 | 0.8071354 | 3 | 0.7774959 |
| CEBPD | 0.8495617 | 0.6821147 | Not in SFARI | 0.7658382 |
| LTBR | 0.6575780 | 0.8157083 | Not in SFARI | 0.7366431 |
| PLEKHG2 | 0.6914450 | 0.7390905 | Not in SFARI | 0.7152677 |
| CD93 | 0.6232782 | 0.8021016 | Not in SFARI | 0.7126899 |
| CD163 | 0.6590678 | 0.7631685 | Not in SFARI | 0.7111182 |
| IL1R1 | 0.6673902 | 0.7454650 | Not in SFARI | 0.7064276 |
| INHBA | 0.7138406 | 0.6897204 | Not in SFARI | 0.7017805 |
| TBC1D2B | 0.6069216 | 0.7949069 | Not in SFARI | 0.7009143 |
| PCDHGA2 | 0.5934642 | 0.7934548 | Not in SFARI | 0.6934595 |
| LTBP1 | 0.6240597 | 0.7387364 | Not in SFARI | 0.6813981 |
| CSRNP1 | 0.7942489 | 0.5638992 | Not in SFARI | 0.6790741 |
| PRKX | 0.6817065 | 0.6573171 | Not in SFARI | 0.6695118 |
| ETV3 | 0.7184186 | 0.6094847 | Not in SFARI | 0.6639516 |
| IL4R | 0.7203342 | 0.6030154 | Not in SFARI | 0.6616748 |
| AKAP13 | 0.7316507 | 0.5798342 | Not in SFARI | 0.6557424 |
| MAP3K8 | 0.6118423 | 0.6920723 | Not in SFARI | 0.6519573 |
| BCL6 | 0.6920042 | 0.6059242 | Not in SFARI | 0.6489642 |
| BMF | 0.7099484 | 0.5787629 | Not in SFARI | 0.6443556 |
| CHSY1 | 0.6591621 | 0.6262673 | Not in SFARI | 0.6427147 |
kable(top_genes[[3]] %>% dplyr::select(-ID), caption=paste0('Top 20 genes for Module ', top_modules[3],
' (MTcor = ', round(dataset$MTcor[dataset$Module == top_modules[3]][1],2),')')) %>%
kable_styling(full_width = F)
| Gene Symbol | MM | GS | SFARI Score | Relevance |
|---|---|---|---|---|
| MAPK9 | 0.8936385 | -0.9396129 | Not in SFARI | 0.9166257 |
| TRIM37 | 0.8897530 | -0.9297290 | Not in SFARI | 0.9097410 |
| PREPL | 0.8974015 | -0.9038264 | Not in SFARI | 0.9006140 |
| NAP1L5 | 0.8663709 | -0.9041442 | Not in SFARI | 0.8852576 |
| TTBK2 | 0.8871227 | -0.8745765 | Not in SFARI | 0.8808496 |
| EIF5A2 | 0.8379288 | -0.9113247 | Not in SFARI | 0.8746267 |
| PRKCE | 0.8392500 | -0.9082418 | Not in SFARI | 0.8737459 |
| ATP6V1C1 | 0.8503040 | -0.8959641 | Not in SFARI | 0.8731340 |
| SCN8A | 0.9469981 | -0.7886101 | 1 | 0.8678041 |
| DIRAS1 | 0.8365571 | -0.8964601 | Not in SFARI | 0.8665086 |
| ENO2 | 0.8805819 | -0.8473660 | Not in SFARI | 0.8639740 |
| ATP6V1A | 0.8076082 | -0.9160321 | Not in SFARI | 0.8618202 |
| NAPB | 0.8935221 | -0.8296895 | Not in SFARI | 0.8616058 |
| KIF3A | 0.8715770 | -0.8454561 | Not in SFARI | 0.8585166 |
| RCAN2 | 0.7836787 | -0.9149672 | Not in SFARI | 0.8493229 |
| MAP7D2 | 0.8191900 | -0.8791508 | Not in SFARI | 0.8491704 |
| SCN1A | 0.8943166 | -0.8001031 | 1 | 0.8472099 |
| GABRA1 | 0.9132443 | -0.7756802 | Not in SFARI | 0.8444622 |
| SULT4A1 | 0.8785958 | -0.8102475 | Not in SFARI | 0.8444216 |
| SNAP25 | 0.8768766 | -0.8096627 | 3 | 0.8432696 |
kable(top_genes[[4]] %>% dplyr::select(-ID), caption=paste0('Top 20 genes for Module ', top_modules[4],
' (MTcor = ', round(dataset$MTcor[dataset$Module == top_modules[4]][1],2),')')) %>%
kable_styling(full_width = F)
| Gene Symbol | MM | GS | SFARI Score | Relevance |
|---|---|---|---|---|
| LIMK1 | 0.8636194 | -0.8621181 | Not in SFARI | 0.8628688 |
| RNF157 | 0.8706245 | -0.8484414 | Not in SFARI | 0.8595329 |
| MAP3K9 | 0.7999583 | -0.8868390 | Not in SFARI | 0.8433986 |
| PNKD | 0.8339040 | -0.8335298 | Not in SFARI | 0.8337169 |
| CPLX1 | 0.8379136 | -0.8266084 | Not in SFARI | 0.8322610 |
| KATNB1 | 0.8620121 | -0.7903608 | Not in SFARI | 0.8261864 |
| VAMP1 | 0.8444736 | -0.8017732 | Not in SFARI | 0.8231234 |
| UBE2M | 0.7881808 | -0.8499163 | Not in SFARI | 0.8190485 |
| SHD | 0.7752282 | -0.8627139 | Not in SFARI | 0.8189710 |
| LYNX1 | 0.8181968 | -0.8013245 | Not in SFARI | 0.8097607 |
| PTGES2 | 0.7882388 | -0.8301816 | Not in SFARI | 0.8092102 |
| RAB11FIP5 | 0.7788734 | -0.8337588 | 3 | 0.8063161 |
| TOX2 | 0.8018598 | -0.8096830 | Not in SFARI | 0.8057714 |
| LINC00087 | 0.8505122 | -0.7448316 | Not in SFARI | 0.7976719 |
| PITPNA | 0.6936979 | -0.8760322 | Not in SFARI | 0.7848651 |
| CORO2A | 0.7448224 | -0.8203961 | Not in SFARI | 0.7826093 |
| IQSEC3 | 0.7794723 | -0.7836849 | Not in SFARI | 0.7815786 |
| SSBP3 | 0.8154217 | -0.7458918 | Not in SFARI | 0.7806567 |
| EEF1A2 | 0.8279183 | -0.7293232 | Not in SFARI | 0.7786207 |
| RPH3A | 0.7211072 | -0.8320794 | Not in SFARI | 0.7765933 |
rm(create_table, i)
pca = datExpr %>% prcomp
ids = c()
for(tg in top_genes) ids = c(ids, tg$ID)
plot_data = data.frame('ID'=rownames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2]) %>%
left_join(dataset, by='ID') %>% dplyr::select(ID, PC1, PC2, Module, gene.score) %>%
mutate(color = ifelse(Module %in% top_modules, as.character(Module), 'gray')) %>%
mutate(alpha = ifelse(color %in% top_modules & ID %in% ids, 1, 0.1))
plot_data %>% ggplot(aes(PC1, PC2)) + geom_point(alpha=plot_data$alpha, color=plot_data$color) +
xlab(paste0('PC1 (',round(100*summary(pca)$importance[2,1],2),'%)')) +
ylab(paste0('PC2 (',round(100*summary(pca)$importance[2,2],2),'%)')) +
theme_minimal() + ggtitle('Most relevant genes for top Modules')
rm(ids, pca, tg, plot_data)
Level of expression by Diagnosis for top genes, ordered by relevance (defined above): There is a visible difference in level of expression between diagnosis groups in all of these genes
create_plot = function(i){
plot_data = datExpr[rownames(datExpr) %in% top_genes[[i]]$ID,] %>% mutate('ID' = rownames(.)) %>%
melt(id.vars='ID') %>% mutate(variable = gsub('X','',variable)) %>%
left_join(top_genes[[i]], by='ID') %>%
left_join(datMeta %>% dplyr::select(Dissected_Sample_ID, Diagnosis),
by = c('variable'='Dissected_Sample_ID')) %>% arrange(desc(Relevance))
p = ggplotly(plot_data %>% mutate(external_gene_id=factor(`Gene Symbol`,
levels=unique(plot_data$`Gene Symbol`), ordered=T)) %>%
ggplot(aes(`Gene Symbol`, value, fill=Diagnosis)) + geom_boxplot() + theme_minimal() +
ggtitle(paste0('Top Genes for Module ', top_modules[i], ' (MTcor = ',
round(dataset$MTcor[dataset$Module == top_modules[i]][1],2), ')')) +
xlab('') + ylab('Level of Expression') +
theme(axis.text.x = element_text(angle = 90, hjust = 1)))
return(p)
}
create_plot(1)
create_plot(2)
create_plot(3)
create_plot(4)
rm(create_plot)
Using the package clusterProfiler. Performing Gene Set Enrichment Analysis (GSEA) and Over Representation Analysis (ORA) using the following datasets:
Gene Ontology
Disease Ontology
Disease Gene Network
KEGG
REACTOME
file_name = './../Data/GSEA.RData'
if(file.exists(file_name)){
load(file_name)
} else {
##############################################################################
# PREPARE DATASET
# Create dataset with top modules membership and removing the genes without an assigned module
EA_dataset = data.frame('ensembl_gene_id' = genes_info$ID, module = genes_info$Module) %>%
filter(genes_info$Module!='gray')
# Assign Entrez Gene Id to each gene
getinfo = c('ensembl_gene_id','entrezgene')
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl',
host='feb2014.archive.ensembl.org')
biomart_output = getBM(attributes=getinfo, filters=c('ensembl_gene_id'),
values=genes_info$ID[genes_info$Module!='gray'], mart=mart)
EA_dataset = biomart_output %>% dplyr::rename('ID' = ensembl_gene_id) %>%
left_join(dataset %>% dplyr::select(ID, contains('MM.')), by='ID')
##############################################################################
# PERFORM ENRICHMENT
# Following https://yulab-smu.github.io/clusterProfiler-book/chapter8.html
modules = dataset$Module[dataset$Module!='gray'] %>% as.character %>% table %>% names
nPerm = 1e5 # 100 times more than the default
enrichment_GO = list() # Gene Ontology
enrichment_DO = list() # Disease Ontology
enrichment_DGN = list() # Disease Gene Networks
enrichment_KEGG = list() # Kyoto Encyclopedia of Genes and Genomes
enrichment_Reactome = list() # Reactome: Pathway db
for(module in modules){
cat('\n')
cat(paste0('Module: ', which(modules == module), '/', length(modules)))
geneList = EA_dataset[,paste0('MM.',substring(module,2))]
names(geneList) = EA_dataset[,'entrezgene'] %>% as.character
geneList = sort(geneList, decreasing = TRUE)
enrichment_GO[[module]] = gseGO(geneList, OrgDb = org.Hs.eg.db, pAdjustMethod = 'bonferroni',
pvalueCutoff = 0.1, nPerm = nPerm, verbose = FALSE, seed = TRUE) %>%
data.frame
enrichment_DO[[module]] = gseDO(geneList, pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1,
nPerm = nPerm, verbose = FALSE, seed = TRUE) %>% data.frame
enrichment_DGN[[module]] = gseDGN(geneList, pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1,
nPerm = nPerm, verbose = FALSE, seed = TRUE) %>% data.frame
enrichment_KEGG[[module]] = gseKEGG(geneList, organism = 'human', pAdjustMethod = 'bonferroni',
pvalueCutoff = 0.1, nPerm = nPerm, verbose = FALSE, seed = TRUE) %>%
data.frame
enrichment_Reactome[[module]] = gsePathway(geneList, organism = 'human', pAdjustMethod = 'bonferroni',
pvalueCutoff = 0.1, nPerm = nPerm, verbose = F, seed = T) %>%
data.frame
# Temporal save, just in case SFARI Genes enrichment fails
save(enrichment_GO, enrichment_DO, enrichment_DGN, enrichment_KEGG, enrichment_Reactome, file=file_name)
}
##############################################################################
# PERFROM ENRICHMENT FOR SFARI GENES
# BUILD MAPPING BETWEEN GENES AND SFARI
# Build TERM2GENE: dataframe of 2 columns with term and gene
term2gene = biomart_output %>%
left_join(genes_info %>% dplyr::select(ID, `gene-score`),
by = c('ensembl_gene_id'='ID')) %>% dplyr::select(-ensembl_gene_id) %>%
mutate('SFARI' = ifelse(`gene-score` != 'Others','SFARI','Others'),
`gene-score` = ifelse(`gene-score` != 'Others',
paste0('SFARI Score ',`gene-score`), 'Others')) %>%
melt(id.vars = 'entrezgene') %>% dplyr::select(value, entrezgene) %>%
dplyr::rename('term' = value, 'gene' = entrezgene) %>% distinct
# PERFORM GSEA
enrichment_SFARI = list()
cat('\n\nGSEA OF SFARI GENES\n')
for(module in modules){
cat('\n')
cat(paste0('Module: ', which(modules == module), '/', length(modules)))
geneList = EA_dataset[,paste0('MM.',substring(module,2))]
names(geneList) = EA_dataset[,'entrezgene'] %>% as.character
geneList = sort(geneList, decreasing = TRUE)
enrichment_SFARI[[module]] = clusterProfiler::GSEA(geneList, pAdjustMethod = 'bonferroni', nPerm = nPerm,
TERM2GENE = term2gene, pvalueCutoff=1, maxGSSize=2e3,
verbose = FALSE, seed = TRUE) %>% data.frame
# Temporal save
save(enrichment_GO, enrichment_DO, enrichment_DGN, enrichment_KEGG, enrichment_Reactome,
enrichment_SFARI, file=file_name)
}
##############################################################################
# PERFROM ENRICHMENT FOR OLD SFARI GENES
# BUILD MAPPING BETWEEN GENES AND SFARI
# Build TERM2GENE: dataframe of 2 columns with term and gene
term2gene = biomart_output %>%
left_join(genes_info %>% dplyr::select(ID, `old-gene-score`),
by = c('ensembl_gene_id'='ID')) %>% dplyr::select(-ensembl_gene_id) %>%
mutate('SFARI' = ifelse(`old-gene-score` != 'Others','SFARI','Others'),
`old-gene-score` = ifelse(`old-gene-score` != 'Others',
paste0('SFARI Score ',`old-gene-score`), 'Others')) %>%
melt(id.vars = 'entrezgene') %>% dplyr::select(value, entrezgene) %>%
dplyr::rename('term' = value, 'gene' = entrezgene) %>% distinct
# PERFORM GSEA
enrichment_old_SFARI = list()
cat('\n\nGSEA OF OLD SFARI GENES\n')
for(module in modules){
cat('\n')
cat(paste0('Module: ', which(modules == module), '/', length(modules)))
geneList = EA_dataset[,paste0('MM.',substring(module,2))]
names(geneList) = EA_dataset[,'entrezgene'] %>% as.character
geneList = sort(geneList, decreasing = TRUE)
enrichment_old_SFARI[[module]] = clusterProfiler::GSEA(geneList, pAdjustMethod = 'bonferroni',
nPerm = nPerm, TERM2GENE = term2gene, pvalueCutoff=1,
maxGSSize=2e3, verbose = FALSE, seed = TRUE) %>%
data.frame
# Temporal save
save(enrichment_GO, enrichment_DO, enrichment_DGN, enrichment_KEGG, enrichment_Reactome,
enrichment_SFARI, enrichment_old_SFARI, file=file_name)
}
##############################################################################
# Save enrichment results
save(enrichment_GO, enrichment_DO, enrichment_DGN, enrichment_KEGG, enrichment_Reactome,
enrichment_SFARI, enrichment_old_SFARI, file=file_name)
rm(getinfo, mart, biomart_output, module, term2gene, geneList, EA_dataset, nPerm)
}
# Rename lists
GSEA_GO = enrichment_GO
GSEA_DGN = enrichment_DGN
GSEA_DO = enrichment_DO
GSEA_KEGG = enrichment_KEGG
GSEA_Reactome = enrichment_Reactome
GSEA_SFARI = enrichment_SFARI
GSEA_old_SFARI = enrichment_old_SFARI
file_name = './../Data/ORA.RData'
if(file.exists(file_name)){
load(file_name)
} else {
##############################################################################
# PREPARE DATASET
# Create dataset with top modules membership and removing the genes without an assigned module
EA_dataset = data.frame('ensembl_gene_id' = genes_info$ID, module = genes_info$Module) %>%
filter(genes_info$Module!='gray')
# Assign Entrez Gene Id to each gene
getinfo = c('ensembl_gene_id','entrezgene')
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl',
host='feb2014.archive.ensembl.org')
biomart_output = getBM(attributes=getinfo, filters=c('ensembl_gene_id'),
values=genes_info$ID[genes_info$Module!='gray'], mart=mart)
EA_dataset = biomart_output %>% dplyr::rename('ID' = ensembl_gene_id) %>%
left_join(dataset %>% dplyr::select(ID, Module), by='ID')
##############################################################################
# PERFORM ENRICHMENT
# Following https://yulab-smu.github.io/clusterProfiler-book/chapter8.html
modules = dataset$Module[dataset$Module!='gray'] %>% as.character %>% table %>% names
universe = EA_dataset$entrezgene %>% as.character
enrichment_GO = list() # Gene Ontology
enrichment_DO = list() # Disease Ontology
enrichment_DGN = list() # Disease Gene Networks
enrichment_KEGG = list() # Kyoto Encyclopedia of Genes and Genomes
enrichment_Reactome = list() # Reactome: Pathway db
for(module in modules){
genes_in_module = EA_dataset$entrezgene[EA_dataset$Module == module]
enrichment_GO[[module]] = enrichGO(gene = genes_in_module, universe = universe, OrgDb = org.Hs.eg.db,
ont = 'All', pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1,
qvalueCutoff = 1) %>% data.frame
enrichment_DO[[module]] = enrichDO(gene = genes_in_module, universe = universe, qvalueCutoff = 1,
pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1) %>% data.frame
enrichment_DGN[[module]] = enrichDGN(gene = genes_in_module, universe = universe, qvalueCutoff = 1,
pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1) %>% data.frame
enrichment_KEGG[[module]] = enrichKEGG(gene = genes_in_module, universe = universe, qvalueCutoff = 1,
pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1) %>% data.frame
enrichment_Reactome[[module]] = enrichPathway(gene = genes_in_module, universe = universe, qvalueCutoff = 1,
pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1) %>%
data.frame
}
# Temporal save, just in case SFARI Genes enrichment fails
save(enrichment_GO, enrichment_DO, enrichment_DGN, enrichment_KEGG, enrichment_Reactome, file=file_name)
##############################################################################
# PERFROM ENRICHMENT FOR SFARI GENES
# BUILD MAPPING BETWEEN GENES AND SFARI
# Build TERM2GENE: dataframe of 2 columns with term and gene
term2gene = biomart_output %>%
left_join(genes_info %>% dplyr::select(ID, `gene-score`),
by = c('ensembl_gene_id'='ID')) %>% dplyr::select(-ensembl_gene_id) %>%
mutate('SFARI' = ifelse(`gene-score` != 'Others','SFARI','Others'),
`gene-score` = ifelse(`gene-score` != 'Others',
paste0('SFARI Score ',`gene-score`), 'Others')) %>%
melt(id.vars = 'entrezgene') %>% dplyr::select(value, entrezgene) %>%
dplyr::rename('term' = value, 'gene' = entrezgene) %>% distinct
# PERFORM GSEA
enrichment_SFARI = list()
for(module in modules){
genes_in_module = EA_dataset$entrezgene[EA_dataset$Module == module]
enrichment_SFARI[[module]] = enricher(gene = genes_in_module, universe = universe,
pAdjustMethod = 'bonferroni', TERM2GENE = term2gene,
pvalueCutoff = 1, qvalueCutoff = 1, maxGSSize = 50000) %>%
data.frame %>% dplyr::select(-geneID,-Description)
}
##############################################################################
# PERFROM ENRICHMENT FOR SFARI GENES
# BUILD MAPPING BETWEEN GENES AND SFARI
# Build TERM2GENE: dataframe of 2 columns with term and gene
term2gene = biomart_output %>%
left_join(genes_info %>% dplyr::select(ID, `old-gene-score`),
by = c('ensembl_gene_id'='ID')) %>% dplyr::select(-ensembl_gene_id) %>%
mutate('SFARI' = ifelse(`old-gene-score` != 'Others','SFARI','Others'),
`old-gene-score` = ifelse(`old-gene-score` != 'Others',
paste0('SFARI Score ',`old-gene-score`), 'Others')) %>%
melt(id.vars = 'entrezgene') %>% dplyr::select(value, entrezgene) %>%
dplyr::rename('term' = value, 'gene' = entrezgene) %>% distinct
# PERFORM GSEA
enrichment_old_SFARI = list()
for(module in modules){
genes_in_module = EA_dataset$entrezgene[EA_dataset$Module == module]
enrichment_old_SFARI[[module]] = enricher(gene = genes_in_module, universe = universe,
pAdjustMethod = 'bonferroni', TERM2GENE = term2gene,
pvalueCutoff = 1, qvalueCutoff = 1, maxGSSize = 5e4) %>%
data.frame %>% dplyr::select(-geneID,-Description)
}
##############################################################################
# Save enrichment results
save(enrichment_GO, enrichment_DO, enrichment_DGN, enrichment_KEGG, enrichment_Reactome,
enrichment_SFARI, enrichment_old_SFARI, file=file_name)
rm(getinfo, mart, biomart_output, gene, module, term2gene, genes_in_module, EA_dataset, universe, file_name)
}
# Rename lists
ORA_GO = enrichment_GO
ORA_DGN = enrichment_DGN
ORA_DO = enrichment_DO
ORA_KEGG = enrichment_KEGG
ORA_Reactome = enrichment_Reactome
ORA_SFARI = enrichment_SFARI
ORA_old_SFARI = enrichment_old_SFARI
rm(enrichment_GO, enrichment_DO, enrichment_DGN, enrichment_KEGG, enrichment_Reactome, enrichment_SFARI,
enrichment_old_SFARI)
Both GSEA and ORA are commonly used to study enrichment in sets of genes, but when using them for studying our modules both have shortcomings:
GSEA takes into consideration some ordering of the genes, in this case given by their Module Membership, which is correlated to the membership of genes to the module, but has two problems:
Being a continuous scale, it doesn’t separate by a threshold the genes that are truly in the cluster from the rest
The Module Membership metric is correlated to the real membership of the module, but this correlation is not perfect: a high MM doesn’t always mean the gene belongs to that module, for example, selecting a random module, in the plot below we can see the MM distribution of genes belonging to that module against the rest of the genes belonging to other modules and, although in general, genes belonging to that module have a higher distribution of MM, there is still a big overlap between the two groups, making MM a noisy metric for performing GSEA
modules = dataset$Module[dataset$Module!='gray'] %>% as.character %>% table %>% names
module = sample(modules,1)
plot_data = dataset %>% dplyr::select(Module, paste0('MM.',gsub('#','',module))) %>%
mutate(in_module = substring(Module,2) == gsub('#','',module), selected_module = module) %>%
mutate(alpha = ifelse(in_module, 0.8, 0.1))
colnames(plot_data)[2] = 'MM'
p = plot_data %>% ggplot(aes(selected_module, MM, color = in_module)) + geom_jitter(alpha = plot_data$alpha) +
xlab('') + ylab('Module Membership') + coord_flip() + theme_minimal() +
theme(legend.position = 'none')
ggExtra::ggMarginal(p, type = 'density', groupColour = TRUE, groupFill = TRUE, margins = 'x', size=1)
rm(modules, module, p, plot_data)
So perhaps it could be useful to use both methods together, since they seem to complement each other’s shortcomings very well, performing the enrichment using both methods and identifying the terms that are found to be enriched by both
Note: Since the enrichment in both methods is quite a stric restriction, we decide to relax the corrected p-value threshold (using Bonferroni correction) to 0.1.
compare_methods = function(GSEA_list, ORA_list){
for(top_module in top_modules){
cat(paste0(' \n \nEnrichments for Module ', top_module, ' (MTcor=',
round(dataset$MTcor[dataset$Module==top_module][1],2), '): \n \n'))
GSEA = GSEA_list[[top_module]]
ORA = ORA_list[[top_module]]
cat(paste0('GSEA has ', nrow(GSEA), ' enriched terms \n'))
cat(paste0('ORA has ', nrow(ORA), ' enriched terms \n'))
cat(paste0(sum(ORA$ID %in% GSEA$ID), ' terms are enriched in both methods \n \n'))
plot_data = GSEA %>% mutate(pval_GSEA = p.adjust) %>% dplyr::select(ID, Description, NES, pval_GSEA) %>%
inner_join(ORA %>% mutate(pval_ORA = p.adjust) %>%
dplyr::select(ID, pval_ORA, GeneRatio, qvalue), by = 'ID')
if(nrow(plot_data)>0){
print(plot_data %>% mutate(pval_mean = pval_ORA + pval_GSEA) %>%
arrange(pval_mean) %>% dplyr::select(-pval_mean) %>%
kable %>% kable_styling(full_width = F))
}
}
}
plot_results = function(GSEA_list, ORA_list){
l = htmltools::tagList()
for(i in 1:length(top_modules)){
GSEA = GSEA_list[[top_modules[i]]]
ORA = ORA_list[[top_modules[i]]]
plot_data = GSEA %>% mutate(pval_GSEA = p.adjust) %>% dplyr::select(ID, Description, NES, pval_GSEA) %>%
inner_join(ORA %>% mutate(pval_ORA = p.adjust) %>% dplyr::select(ID, pval_ORA), by = 'ID')
if(nrow(plot_data)>5){
min_val = min(min(plot_data$pval_GSEA), min(plot_data$pval_ORA))
max_val = max(max(max(plot_data$pval_GSEA), max(plot_data$pval_ORA)),0.05)
ggp = ggplotly(plot_data %>% ggplot(aes(pval_GSEA, pval_ORA, color = NES)) +
geom_point(aes(id = Description)) +
geom_vline(xintercept = 0.05, color = 'gray', linetype = 'dotted') +
geom_hline(yintercept = 0.05, color = 'gray', linetype = 'dotted') +
ggtitle(paste0('Enriched terms in common for Module ', top_modules[i])) +
scale_x_continuous(limits = c(min_val, max_val)) +
scale_y_continuous(limits = c(min_val, max_val)) +
xlab('Corrected p-value for GSEA') + ylab('Corrected p-value for ORA') +
scale_colour_viridis(direction = -1) + theme_minimal() + coord_fixed())
l[[i]] = ggp
}
}
return(l)
}
compare_methods(GSEA_KEGG, ORA_KEGG)
Enrichments for Module #88AC00 (MTcor=0.96):
GSEA has 57 enriched terms
ORA has 5 enriched terms
5 terms are enriched in both methods
| ID | Description | NES | pval_GSEA | pval_ORA | GeneRatio | qvalue |
|---|---|---|---|---|---|---|
| hsa05034 | Alcoholism | 2.517619 | 0.0050526 | 0.0000000 | 43/406 | 0.0000000 |
| hsa05202 | Transcriptional misregulation in cancer | 2.538826 | 0.0050674 | 0.0000011 | 32/406 | 0.0000003 |
| hsa05322 | Systemic lupus erythematosus | 3.095129 | 0.0052397 | 0.0000000 | 40/406 | 0.0000000 |
| hsa05203 | Viral carcinogenesis | 2.122212 | 0.0049958 | 0.0039555 | 28/406 | 0.0007636 |
| hsa05131 | Shigellosis | 1.729790 | 0.0195785 | 0.0033899 | 32/406 | 0.0007636 |
Enrichments for Module #FE61CD (MTcor=0.92):
GSEA has 71 enriched terms
ORA has 6 enriched terms
6 terms are enriched in both methods
| ID | Description | NES | pval_GSEA | pval_ORA | GeneRatio | qvalue |
|---|---|---|---|---|---|---|
| hsa04064 | NF-kappa B signaling pathway | 2.613587 | 0.0053061 | 0.0086394 | 6/51 | 0.0071893 |
| hsa04668 | TNF signaling pathway | 2.315547 | 0.0052400 | 0.0208392 | 6/51 | 0.0073165 |
| hsa04380 | Osteoclast differentiation | 2.381074 | 0.0052017 | 0.0363957 | 6/51 | 0.0073165 |
| hsa04010 | MAPK signaling pathway | 1.692075 | 0.0093986 | 0.0388377 | 9/51 | 0.0073165 |
| hsa04060 | Cytokine-cytokine receptor interaction | 2.428932 | 0.0049869 | 0.0439614 | 7/51 | 0.0073165 |
| hsa04350 | TGF-beta signaling pathway | 1.909197 | 0.0265772 | 0.0832312 | 5/51 | 0.0115434 |
Enrichments for Module #00BADE (MTcor=-0.9):
GSEA has 44 enriched terms
ORA has 14 enriched terms
12 terms are enriched in both methods
| ID | Description | NES | pval_GSEA | pval_ORA | GeneRatio | qvalue |
|---|---|---|---|---|---|---|
| hsa04020 | Calcium signaling pathway | 1.997146 | 0.0043946 | 0.0001522 | 37/543 | 0.0000607 |
| hsa04728 | Dopaminergic synapse | 2.224843 | 0.0046546 | 0.0002936 | 28/543 | 0.0000780 |
| hsa04070 | Phosphatidylinositol signaling system | 1.960017 | 0.0048200 | 0.0013863 | 22/543 | 0.0002764 |
| hsa05033 | Nicotine addiction | 2.313572 | 0.0053252 | 0.0043829 | 12/543 | 0.0006990 |
| hsa04724 | Glutamatergic synapse | 2.150640 | 0.0047448 | 0.0056285 | 23/543 | 0.0007481 |
| hsa04720 | Long-term potentiation | 2.247426 | 0.0050307 | 0.0189579 | 16/543 | 0.0015512 |
| hsa04750 | Inflammatory mediator regulation of TRP channels | 1.853197 | 0.0192749 | 0.0194522 | 20/543 | 0.0015512 |
| hsa04024 | cAMP signaling pathway | 1.675747 | 0.0437914 | 0.0156488 | 33/543 | 0.0015512 |
| hsa04713 | Circadian entrainment | 2.198473 | 0.0048187 | 0.0604770 | 19/543 | 0.0043843 |
| hsa04360 | Axon guidance | 1.681080 | 0.0663946 | 0.0081992 | 32/543 | 0.0009341 |
| hsa04970 | Salivary secretion | 2.273165 | 0.0049516 | 0.0967544 | 16/543 | 0.0056301 |
| hsa04929 | GnRH secretion | 1.942345 | 0.0354635 | 0.0988421 | 14/543 | 0.0056301 |
Enrichments for Module #D177FF (MTcor=-0.91):
GSEA has 37 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods
compare_methods(GSEA_Reactome, ORA_Reactome)
Enrichments for Module #88AC00 (MTcor=0.96):
GSEA has 190 enriched terms
ORA has 85 enriched terms
81 terms are enriched in both methods
| ID | Description | NES | pval_GSEA | pval_ORA | GeneRatio | qvalue |
|---|---|---|---|---|---|---|
| R-HSA-194315 | Signaling by Rho GTPases | 1.919584 | 0.0189553 | 0.0000000 | 63/532 | 0.0000000 |
| R-HSA-2262752 | Cellular responses to stress | 1.594957 | 0.0190742 | 0.0000000 | 63/532 | 0.0000000 |
| R-HSA-195258 | RHO GTPase Effectors | 1.982037 | 0.0196685 | 0.0000000 | 51/532 | 0.0000000 |
| R-HSA-3247509 | Chromatin modifying enzymes | 2.430526 | 0.0200306 | 0.0000000 | 48/532 | 0.0000000 |
| R-HSA-4839726 | Chromatin organization | 2.430526 | 0.0200306 | 0.0000000 | 48/532 | 0.0000000 |
| R-HSA-9006931 | Signaling by Nuclear Receptors | 2.363239 | 0.0201838 | 0.0000000 | 52/532 | 0.0000000 |
| R-HSA-157118 | Signaling by NOTCH | 2.159970 | 0.0203944 | 0.0000000 | 45/532 | 0.0000000 |
| R-HSA-8878171 | Transcriptional regulation by RUNX1 | 2.283519 | 0.0204689 | 0.0000000 | 45/532 | 0.0000000 |
| R-HSA-201681 | TCF dependent signaling in response to WNT | 1.864157 | 0.0204833 | 0.0000000 | 46/532 | 0.0000000 |
| R-HSA-8939211 | ESR-mediated signaling | 2.547076 | 0.0205527 | 0.0000000 | 51/532 | 0.0000000 |
| R-HSA-2559583 | Cellular Senescence | 2.441615 | 0.0208620 | 0.0000000 | 52/532 | 0.0000000 |
| R-HSA-9018519 | Estrogen-dependent gene expression | 2.946515 | 0.0213853 | 0.0000000 | 46/532 | 0.0000000 |
| R-HSA-212165 | Epigenetic regulation of gene expression | 2.617183 | 0.0213853 | 0.0000000 | 43/532 | 0.0000000 |
| R-HSA-68875 | Mitotic Prophase | 2.696103 | 0.0213966 | 0.0000000 | 43/532 | 0.0000000 |
| R-HSA-3214847 | HATs acetylate histones | 2.660070 | 0.0214073 | 0.0000000 | 40/532 | 0.0000000 |
| R-HSA-211000 | Gene Silencing by RNA | 2.572281 | 0.0214858 | 0.0000000 | 43/532 | 0.0000000 |
| R-HSA-1474165 | Reproduction | 2.988842 | 0.0215897 | 0.0000000 | 42/532 | 0.0000000 |
| R-HSA-8939236 | RUNX1 regulates transcription of genes involved in differentiation of HSCs | 2.534161 | 0.0215897 | 0.0000000 | 41/532 | 0.0000000 |
| R-HSA-2559580 | Oxidative Stress Induced Senescence | 2.609232 | 0.0216809 | 0.0000000 | 43/532 | 0.0000000 |
| R-HSA-1500620 | Meiosis | 3.058186 | 0.0218291 | 0.0000000 | 41/532 | 0.0000000 |
| R-HSA-5617472 | Activation of anterior HOX genes in hindbrain development during early embryogenesis | 2.955994 | 0.0218291 | 0.0000000 | 40/532 | 0.0000000 |
| R-HSA-5619507 | Activation of HOX genes during differentiation | 2.955994 | 0.0218291 | 0.0000000 | 40/532 | 0.0000000 |
| R-HSA-73864 | RNA Polymerase I Transcription | 2.904259 | 0.0218444 | 0.0000000 | 40/532 | 0.0000000 |
| R-HSA-5250941 | Negative epigenetic regulation of rRNA expression | 2.880393 | 0.0218473 | 0.0000000 | 39/532 | 0.0000000 |
| R-HSA-2559582 | Senescence-Associated Secretory Phenotype (SASP) | 2.984685 | 0.0218473 | 0.0000000 | 45/532 | 0.0000000 |
| R-HSA-1912422 | Pre-NOTCH Expression and Processing | 3.130297 | 0.0218552 | 0.0000000 | 42/532 | 0.0000000 |
| R-HSA-73854 | RNA Polymerase I Promoter Clearance | 2.879809 | 0.0218552 | 0.0000000 | 40/532 | 0.0000000 |
| R-HSA-5578749 | Transcriptional regulation by small RNAs | 2.838955 | 0.0218824 | 0.0000000 | 41/532 | 0.0000000 |
| R-HSA-73886 | Chromosome Maintenance | 2.480300 | 0.0218824 | 0.0000006 | 25/532 | 0.0000000 |
| R-HSA-427413 | NoRC negatively regulates rRNA expression | 2.939042 | 0.0219032 | 0.0000000 | 39/532 | 0.0000000 |
| R-HSA-5250913 | Positive epigenetic regulation of rRNA expression | 2.854283 | 0.0219032 | 0.0000000 | 40/532 | 0.0000000 |
| R-HSA-5693607 | Processing of DNA double-strand break ends | 2.351151 | 0.0219457 | 0.0000283 | 22/532 | 0.0000004 |
| R-HSA-977225 | Amyloid fiber formation | 3.046027 | 0.0220555 | 0.0000000 | 41/532 | 0.0000000 |
| R-HSA-3214815 | HDACs deacetylate histones | 3.111249 | 0.0220661 | 0.0000000 | 38/532 | 0.0000000 |
| R-HSA-1912408 | Pre-NOTCH Transcription and Translation | 3.290943 | 0.0220701 | 0.0000000 | 40/532 | 0.0000000 |
| R-HSA-5625740 | RHO GTPases activate PKNs | 3.055960 | 0.0220701 | 0.0000000 | 40/532 | 0.0000000 |
| R-HSA-8936459 | RUNX1 regulates genes involved in megakaryocyte differentiation and platelet function | 3.189940 | 0.0220701 | 0.0000000 | 39/532 | 0.0000000 |
| R-HSA-73772 | RNA Polymerase I Promoter Escape | 3.138252 | 0.0221301 | 0.0000000 | 39/532 | 0.0000000 |
| R-HSA-5250924 | B-WICH complex positively regulates rRNA expression | 3.018731 | 0.0221301 | 0.0000000 | 39/532 | 0.0000000 |
| R-HSA-73884 | Base Excision Repair | 2.464025 | 0.0221301 | 0.0000004 | 23/532 | 0.0000000 |
| R-HSA-69473 | G2/M DNA damage checkpoint | 2.263027 | 0.0220555 | 0.0000768 | 21/532 | 0.0000010 |
| R-HSA-201722 | Formation of the beta-catenin:TCF transactivating complex | 3.012896 | 0.0221422 | 0.0000000 | 39/532 | 0.0000000 |
| R-HSA-912446 | Meiotic recombination | 3.170221 | 0.0223296 | 0.0000000 | 39/532 | 0.0000000 |
| R-HSA-3214858 | RMTs methylate histone arginines | 2.894053 | 0.0224035 | 0.0000000 | 30/532 | 0.0000000 |
| R-HSA-5693606 | DNA Double Strand Break Response | 2.432692 | 0.0224035 | 0.0000002 | 22/532 | 0.0000000 |
| R-HSA-157579 | Telomere Maintenance | 2.702816 | 0.0224370 | 0.0000000 | 23/532 | 0.0000000 |
| R-HSA-5693565 | Recruitment and ATM-mediated phosphorylation of repair and signaling proteins at DNA double strand breaks | 2.442701 | 0.0224370 | 0.0000001 | 22/532 | 0.0000000 |
| R-HSA-2559586 | DNA Damage/Telomere Stress Induced Senescence | 2.883011 | 0.0224487 | 0.0000000 | 26/532 | 0.0000000 |
| R-HSA-1221632 | Meiotic synapsis | 3.005301 | 0.0224683 | 0.0000000 | 25/532 | 0.0000000 |
| R-HSA-427389 | ERCC6 (CSB) and EHMT2 (G9a) positively regulate rRNA expression | 3.200840 | 0.0224804 | 0.0000000 | 39/532 | 0.0000000 |
| R-HSA-3214841 | PKMTs methylate histone lysines | 2.731314 | 0.0225039 | 0.0000000 | 31/532 | 0.0000000 |
| R-HSA-212300 | PRC2 methylates histones and DNA | 3.260787 | 0.0225278 | 0.0000000 | 41/532 | 0.0000000 |
| R-HSA-606279 | Deposition of new CENPA-containing nucleosomes at the centromere | 2.921820 | 0.0225278 | 0.0000000 | 25/532 | 0.0000000 |
| R-HSA-774815 | Nucleosome assembly | 2.921820 | 0.0225278 | 0.0000000 | 25/532 | 0.0000000 |
| R-HSA-2299718 | Condensation of Prophase Chromosomes | 3.297031 | 0.0225522 | 0.0000000 | 38/532 | 0.0000000 |
| R-HSA-5693571 | Nonhomologous End-Joining (NHEJ) | 2.374196 | 0.0225522 | 0.0000000 | 23/532 | 0.0000000 |
| R-HSA-5625886 | Activated PKN1 stimulates transcription of AR (androgen receptor) regulated genes KLK2 and KLK3 | 3.399894 | 0.0226605 | 0.0000000 | 39/532 | 0.0000000 |
| R-HSA-427359 | SIRT1 negatively regulates rRNA expression | 3.292509 | 0.0226625 | 0.0000000 | 38/532 | 0.0000000 |
| R-HSA-5334118 | DNA methylation | 3.370673 | 0.0227099 | 0.0000000 | 38/532 | 0.0000000 |
| R-HSA-73728 | RNA Polymerase I Promoter Opening | 3.354429 | 0.0227099 | 0.0000000 | 38/532 | 0.0000000 |
| R-HSA-73929 | Base-Excision Repair, AP Site Formation | 2.809426 | 0.0227575 | 0.0000000 | 23/532 | 0.0000000 |
| R-HSA-110328 | Recognition and association of DNA glycosylase with site containing an affected pyrimidine | 2.834817 | 0.0228244 | 0.0000000 | 23/532 | 0.0000000 |
| R-HSA-110329 | Cleavage of the damaged pyrimidine | 2.834817 | 0.0228244 | 0.0000000 | 23/532 | 0.0000000 |
| R-HSA-73928 | Depyrimidination | 2.834817 | 0.0228244 | 0.0000000 | 23/532 | 0.0000000 |
| R-HSA-3214842 | HDMs demethylate histones | 3.048686 | 0.0229185 | 0.0000000 | 30/532 | 0.0000000 |
| R-HSA-110330 | Recognition and association of DNA glycosylase with site containing an affected purine | 3.004381 | 0.0229523 | 0.0000000 | 23/532 | 0.0000000 |
| R-HSA-110331 | Cleavage of the damaged purine | 3.004381 | 0.0229523 | 0.0000000 | 23/532 | 0.0000000 |
| R-HSA-73927 | Depurination | 3.004381 | 0.0229523 | 0.0000000 | 23/532 | 0.0000000 |
| R-HSA-4551638 | SUMOylation of chromatin organization proteins | 2.007283 | 0.0224804 | 0.0005282 | 17/532 | 0.0000065 |
| R-HSA-5693567 | HDR through Homologous Recombination (HRR) or Single Strand Annealing (SSA) | 2.211131 | 0.0214858 | 0.0015531 | 23/532 | 0.0000188 |
| R-HSA-171306 | Packaging Of Telomere Ends | 3.083562 | 0.0230685 | 0.0000000 | 23/532 | 0.0000000 |
| R-HSA-1266695 | Interleukin-7 signaling | 2.267728 | 0.0232992 | 0.0000215 | 13/532 | 0.0000003 |
| R-HSA-5693538 | Homology Directed Repair | 2.142475 | 0.0214296 | 0.0035437 | 23/532 | 0.0000423 |
| R-HSA-449147 | Signaling by Interleukins | 1.990212 | 0.0191247 | 0.0070893 | 46/532 | 0.0000836 |
| R-HSA-5693532 | DNA Double-Strand Break Repair | 1.929327 | 0.0210565 | 0.0098601 | 25/532 | 0.0001133 |
| R-HSA-1474244 | Extracellular matrix organization | 2.421389 | 0.0200306 | 0.0348540 | 33/532 | 0.0003905 |
| R-HSA-68886 | M Phase | 1.594499 | 0.0577722 | 0.0002165 | 48/532 | 0.0000027 |
| R-HSA-3108232 | SUMO E3 ligases SUMOylate target proteins | 2.127288 | 0.0209162 | 0.0384890 | 25/532 | 0.0004259 |
| R-HSA-6785807 | Interleukin-4 and Interleukin-13 signaling | 2.614762 | 0.0220709 | 0.0582255 | 16/532 | 0.0006366 |
| R-HSA-1474228 | Degradation of the extracellular matrix | 2.035932 | 0.0216559 | 0.0616831 | 19/532 | 0.0006664 |
| R-HSA-2990846 | SUMOylation | 2.050574 | 0.0208620 | 0.0682249 | 25/532 | 0.0007284 |
Enrichments for Module #FE61CD (MTcor=0.92):
GSEA has 206 enriched terms
ORA has 3 enriched terms
3 terms are enriched in both methods
| ID | Description | NES | pval_GSEA | pval_ORA | GeneRatio | qvalue |
|---|---|---|---|---|---|---|
| R-HSA-6785807 | Interleukin-4 and Interleukin-13 signaling | 3.000070 | 0.0220913 | 0.0049350 | 6/56 | 0.0048301 |
| R-HSA-6783783 | Interleukin-10 signaling | 2.788374 | 0.0237253 | 0.0143897 | 4/56 | 0.0070420 |
| R-HSA-449147 | Signaling by Interleukins | 2.241698 | 0.0187936 | 0.0399762 | 10/56 | 0.0130424 |
Enrichments for Module #00BADE (MTcor=-0.9):
GSEA has 81 enriched terms
ORA has 14 enriched terms
12 terms are enriched in both methods
| ID | Description | NES | pval_GSEA | pval_ORA | GeneRatio | qvalue |
|---|---|---|---|---|---|---|
| R-HSA-112316 | Neuronal System | 2.716357 | 0.0164711 | 0.0000000 | 83/813 | 0.0000000 |
| R-HSA-112315 | Transmission across Chemical Synapses | 2.633929 | 0.0175987 | 0.0002076 | 48/813 | 0.0000471 |
| R-HSA-112314 | Neurotransmitter receptors and postsynaptic signal transmission | 2.564667 | 0.0183316 | 0.0000147 | 42/813 | 0.0000063 |
| R-HSA-1296071 | Potassium Channels | 2.258075 | 0.0201397 | 0.0000208 | 27/813 | 0.0000063 |
| R-HSA-983712 | Ion channel transport | 1.834103 | 0.0185571 | 0.0027016 | 35/813 | 0.0003500 |
| R-HSA-1296072 | Voltage gated Potassium channels | 2.507780 | 0.0219822 | 0.0011173 | 15/813 | 0.0001688 |
| R-HSA-442755 | Activation of NMDA receptors and postsynaptic events | 2.464782 | 0.0203097 | 0.0158104 | 21/813 | 0.0017920 |
| R-HSA-180024 | DARPP-32 events | 2.563404 | 0.0227878 | 0.0378210 | 10/813 | 0.0034295 |
| R-HSA-442982 | Ras activation upon Ca2+ influx through NMDA receptor | 2.357164 | 0.0231261 | 0.0489730 | 9/813 | 0.0040370 |
| R-HSA-438064 | Post NMDA receptor activation events | 2.399184 | 0.0206225 | 0.0775438 | 18/813 | 0.0054087 |
| R-HSA-438066 | Unblocking of NMDA receptors, glutamate binding and activation | 2.404319 | 0.0230288 | 0.0761934 | 9/813 | 0.0054087 |
| R-HSA-6794362 | Protein-protein interactions at synapses | 2.328002 | 0.0201942 | 0.0912816 | 20/813 | 0.0059122 |
Enrichments for Module #D177FF (MTcor=-0.91):
GSEA has 88 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods
Plots of the results when there are more than 5 terms in common between methods:
plot_results(GSEA_Reactome, ORA_Reactome)
compare_methods(GSEA_GO, ORA_GO)
Enrichments for Module #88AC00 (MTcor=0.96):
GSEA has 291 enriched terms
ORA has 77 enriched terms
42 terms are enriched in both methods
| ID | Description | NES | pval_GSEA | pval_ORA | GeneRatio | qvalue |
|---|---|---|---|---|---|---|
| GO:0042060 | wound healing | 2.194157 | 0.0819107 | 0.0000005 | 63/802 | 0.0000000 |
| GO:0001525 | angiogenesis | 2.387887 | 0.0828851 | 0.0003043 | 53/802 | 0.0000062 |
| GO:0009617 | response to bacterium | 2.347802 | 0.0830221 | 0.0002033 | 53/802 | 0.0000042 |
| GO:0048514 | blood vessel morphogenesis | 2.487801 | 0.0809534 | 0.0026436 | 57/802 | 0.0000506 |
| GO:1903706 | regulation of hemopoiesis | 2.167797 | 0.0829843 | 0.0009943 | 51/802 | 0.0000194 |
| GO:0030099 | myeloid cell differentiation | 2.368430 | 0.0842945 | 0.0000089 | 51/802 | 0.0000002 |
| GO:0040029 | regulation of gene expression, epigenetic | 2.006127 | 0.0868845 | 0.0000000 | 49/802 | 0.0000000 |
| GO:0071103 | DNA conformation change | 2.004744 | 0.0879365 | 0.0000031 | 42/802 | 0.0000001 |
| GO:0016458 | gene silencing | 1.961397 | 0.0887714 | 0.0000000 | 47/802 | 0.0000000 |
| GO:0045637 | regulation of myeloid cell differentiation | 2.452262 | 0.0891518 | 0.0000028 | 38/802 | 0.0000001 |
| GO:0071824 | protein-DNA complex subunit organization | 2.158352 | 0.0892023 | 0.0000001 | 41/802 | 0.0000000 |
| GO:0065004 | protein-DNA complex assembly | 2.186143 | 0.0904222 | 0.0000000 | 41/802 | 0.0000000 |
| GO:0031047 | gene silencing by RNA | 2.110681 | 0.0910882 | 0.0000000 | 39/802 | 0.0000000 |
| GO:0006323 | DNA packaging | 2.369197 | 0.0913482 | 0.0000000 | 41/802 | 0.0000000 |
| GO:0006333 | chromatin assembly or disassembly | 2.416547 | 0.0918712 | 0.0000000 | 42/802 | 0.0000000 |
| GO:0032200 | telomere organization | 2.049108 | 0.0919843 | 0.0000015 | 32/802 | 0.0000000 |
| GO:0035194 | posttranscriptional gene silencing by RNA | 2.132668 | 0.0920512 | 0.0000000 | 37/802 | 0.0000000 |
| GO:0016441 | posttranscriptional gene silencing | 2.120941 | 0.0920570 | 0.0000000 | 37/802 | 0.0000000 |
| GO:0035195 | gene silencing by miRNA | 2.165531 | 0.0922509 | 0.0000000 | 37/802 | 0.0000000 |
| GO:0034728 | nucleosome organization | 2.506418 | 0.0927991 | 0.0000000 | 41/802 | 0.0000000 |
| GO:0031497 | chromatin assembly | 2.493018 | 0.0933673 | 0.0000000 | 41/802 | 0.0000000 |
| GO:0060968 | regulation of gene silencing | 2.289665 | 0.0936134 | 0.0000000 | 38/802 | 0.0000000 |
| GO:0006334 | nucleosome assembly | 2.640251 | 0.0942389 | 0.0000000 | 41/802 | 0.0000000 |
| GO:0051291 | protein heterooligomerization | 2.285785 | 0.0942389 | 0.0000000 | 33/802 | 0.0000000 |
| GO:0060964 | regulation of gene silencing by miRNA | 2.387373 | 0.0946385 | 0.0000000 | 35/802 | 0.0000000 |
| GO:0060147 | regulation of posttranscriptional gene silencing | 2.372310 | 0.0946739 | 0.0000000 | 35/802 | 0.0000000 |
| GO:0060966 | regulation of gene silencing by RNA | 2.372310 | 0.0946739 | 0.0000000 | 35/802 | 0.0000000 |
| GO:0045814 | negative regulation of gene expression, epigenetic | 2.372625 | 0.0960856 | 0.0000000 | 36/802 | 0.0000000 |
| GO:0030219 | megakaryocyte differentiation | 2.707518 | 0.0966436 | 0.0000000 | 32/802 | 0.0000000 |
| GO:0006342 | chromatin silencing | 2.446469 | 0.0978334 | 0.0000000 | 34/802 | 0.0000000 |
| GO:0045652 | regulation of megakaryocyte differentiation | 2.699879 | 0.0980326 | 0.0000000 | 31/802 | 0.0000000 |
| GO:0043044 | ATP-dependent chromatin remodeling | 2.030480 | 0.0980062 | 0.0004975 | 18/802 | 0.0000099 |
| GO:0034508 | centromere complex assembly | 2.299804 | 0.0997654 | 0.0000022 | 18/802 | 0.0000001 |
| GO:0042742 | defense response to bacterium | 2.069047 | 0.0942892 | 0.0078448 | 22/802 | 0.0001443 |
| GO:0030198 | extracellular matrix organization | 2.397407 | 0.0867228 | 0.0168886 | 37/802 | 0.0002934 |
| GO:0007596 | blood coagulation | 1.967537 | 0.0870794 | 0.0204330 | 36/802 | 0.0003485 |
| GO:0050817 | coagulation | 1.952453 | 0.0871132 | 0.0241498 | 36/802 | 0.0003975 |
| GO:0060326 | cell chemotaxis | 2.253859 | 0.0897562 | 0.0236786 | 29/802 | 0.0003967 |
| GO:0007599 | hemostasis | 1.924338 | 0.0869507 | 0.0284816 | 36/802 | 0.0004529 |
| GO:0045638 | negative regulation of myeloid cell differentiation | 2.266552 | 0.0973869 | 0.0491900 | 16/802 | 0.0007692 |
| GO:0043062 | extracellular structure organization | 2.332806 | 0.0852855 | 0.0774516 | 39/802 | 0.0011534 |
| GO:0002237 | response to molecule of bacterial origin | 2.343583 | 0.0877074 | 0.0973397 | 33/802 | 0.0014269 |
Enrichments for Module #FE61CD (MTcor=0.92):
GSEA has 377 enriched terms
ORA has 15 enriched terms
9 terms are enriched in both methods
| ID | Description | NES | pval_GSEA | pval_ORA | GeneRatio | qvalue |
|---|---|---|---|---|---|---|
| GO:0002521 | leukocyte differentiation | 2.321392 | 0.0813809 | 0.0001616 | 14/79 | 0.0001289 |
| GO:0002460 | adaptive immune response based on somatic recombination of immune receptors built from immunoglobulin superfamily domains | 2.576067 | 0.0889123 | 0.0044774 | 9/79 | 0.0015918 |
| GO:0030098 | lymphocyte differentiation | 2.316303 | 0.0857160 | 0.0079510 | 10/79 | 0.0015918 |
| GO:0002250 | adaptive immune response | 2.484124 | 0.0850924 | 0.0145064 | 10/79 | 0.0016530 |
| GO:0001818 | negative regulation of cytokine production | 1.954026 | 0.0874165 | 0.0133227 | 9/79 | 0.0016530 |
| GO:0042110 | T cell activation | 2.462199 | 0.0822601 | 0.0229797 | 11/79 | 0.0022912 |
| GO:0030217 | T cell differentiation | 2.382511 | 0.0890469 | 0.0354006 | 8/79 | 0.0031374 |
| GO:0030099 | myeloid cell differentiation | 2.393929 | 0.0829359 | 0.0836380 | 10/79 | 0.0060528 |
| GO:0046631 | alpha-beta T cell activation | 2.606247 | 0.0945970 | 0.0825956 | 6/79 | 0.0060528 |
Enrichments for Module #00BADE (MTcor=-0.9):
GSEA has 145 enriched terms
ORA has 96 enriched terms
29 terms are enriched in both methods
| ID | Description | NES | pval_GSEA | pval_ORA | GeneRatio | qvalue |
|---|---|---|---|---|---|---|
| GO:0034762 | regulation of transmembrane transport | 2.141873 | 0.0696237 | 0.0000000 | 91/1239 | 0.0000000 |
| GO:0034765 | regulation of ion transmembrane transport | 2.262229 | 0.0712931 | 0.0000000 | 82/1239 | 0.0000000 |
| GO:0015672 | monovalent inorganic cation transport | 2.235542 | 0.0713551 | 0.0000018 | 76/1239 | 0.0000006 |
| GO:0050808 | synapse organization | 2.192044 | 0.0720889 | 0.0001134 | 68/1239 | 0.0000118 |
| GO:0099177 | regulation of trans-synaptic signaling | 2.427900 | 0.0721720 | 0.0000732 | 68/1239 | 0.0000086 |
| GO:0050804 | modulation of chemical synaptic transmission | 2.428109 | 0.0722069 | 0.0000655 | 68/1239 | 0.0000086 |
| GO:0042391 | regulation of membrane potential | 2.143565 | 0.0732511 | 0.0009078 | 60/1239 | 0.0000709 |
| GO:0023061 | signal release | 2.198170 | 0.0721085 | 0.0025859 | 64/1239 | 0.0001616 |
| GO:1904062 | regulation of cation transmembrane transport | 2.232521 | 0.0751080 | 0.0003572 | 54/1239 | 0.0000335 |
| GO:0050890 | cognition | 2.031060 | 0.0759452 | 0.0010708 | 50/1239 | 0.0000772 |
| GO:0007611 | learning or memory | 2.183693 | 0.0774993 | 0.0007774 | 46/1239 | 0.0000662 |
| GO:0006813 | potassium ion transport | 2.203148 | 0.0810243 | 0.0000027 | 42/1239 | 0.0000006 |
| GO:0099504 | synaptic vesicle cycle | 2.691746 | 0.0818072 | 0.0021165 | 35/1239 | 0.0001417 |
| GO:0032409 | regulation of transporter activity | 2.388352 | 0.0774611 | 0.0066624 | 44/1239 | 0.0003903 |
| GO:0071804 | cellular potassium ion transport | 2.177835 | 0.0846153 | 0.0000656 | 32/1239 | 0.0000086 |
| GO:0071805 | potassium ion transmembrane transport | 2.177835 | 0.0846153 | 0.0000656 | 32/1239 | 0.0000086 |
| GO:0022898 | regulation of transmembrane transporter activity | 2.419973 | 0.0780026 | 0.0108402 | 42/1239 | 0.0005977 |
| GO:0032412 | regulation of ion transmembrane transporter activity | 2.453577 | 0.0782320 | 0.0114878 | 41/1239 | 0.0005982 |
| GO:2001257 | regulation of cation channel activity | 2.556723 | 0.0822670 | 0.0155089 | 32/1239 | 0.0007185 |
| GO:0035637 | multicellular organismal signaling | 2.111649 | 0.0804880 | 0.0256310 | 35/1239 | 0.0010445 |
| GO:0036465 | synaptic vesicle recycling | 2.537675 | 0.0936917 | 0.0160975 | 16/1239 | 0.0007185 |
| GO:0048667 | cell morphogenesis involved in neuron differentiation | 2.057200 | 0.0696770 | 0.0417899 | 71/1239 | 0.0015744 |
| GO:0006836 | neurotransmitter transport | 2.495251 | 0.0779514 | 0.0713878 | 40/1239 | 0.0021584 |
| GO:0048488 | synaptic vesicle endocytosis | 2.336862 | 0.0959053 | 0.0543081 | 13/1239 | 0.0018180 |
| GO:0140238 | presynaptic endocytosis | 2.336862 | 0.0959053 | 0.0543081 | 13/1239 | 0.0018180 |
| GO:0060078 | regulation of postsynaptic membrane potential | 2.264422 | 0.0847285 | 0.0695810 | 26/1239 | 0.0021584 |
| GO:0010959 | regulation of metal ion transport | 1.675501 | 0.0739326 | 0.0895323 | 52/1239 | 0.0024682 |
| GO:2000310 | regulation of NMDA receptor activity | 2.641243 | 0.0971384 | 0.0792075 | 12/1239 | 0.0023200 |
| GO:0007416 | synapse assembly | 2.071926 | 0.0813752 | 0.0952305 | 32/1239 | 0.0025503 |
Enrichments for Module #D177FF (MTcor=-0.91):
GSEA has 33 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods
Plots of the results when there are more than 5 terms in common between methods:
plot_results(GSEA_GO, ORA_GO)
compare_methods(GSEA_DO, ORA_DO)
Enrichments for Module #88AC00 (MTcor=0.96):
GSEA has 209 enriched terms
ORA has 17 enriched terms
14 terms are enriched in both methods
| ID | Description | NES | pval_GSEA | pval_ORA | GeneRatio | qvalue |
|---|---|---|---|---|---|---|
| DOID:3856 | male reproductive organ cancer | 2.105333 | 0.0112846 | 0.0007390 | 51/441 | 0.0003123 |
| DOID:10283 | prostate cancer | 2.160427 | 0.0113159 | 0.0008951 | 50/441 | 0.0003123 |
| DOID:289 | endometriosis | 2.249293 | 0.0132301 | 0.0020708 | 17/441 | 0.0004817 |
| DOID:552 | pneumonia | 2.404571 | 0.0131145 | 0.0084400 | 17/441 | 0.0009816 |
| DOID:3388 | periodontal disease | 2.292131 | 0.0128863 | 0.0197256 | 19/441 | 0.0019664 |
| DOID:170 | endocrine gland cancer | 2.183772 | 0.0110390 | 0.0236796 | 53/441 | 0.0020211 |
| DOID:10124 | corneal disease | 2.034677 | 0.0407054 | 0.0061292 | 13/441 | 0.0008554 |
| DOID:3905 | lung carcinoma | 2.070213 | 0.0110390 | 0.0473066 | 52/441 | 0.0030011 |
| DOID:824 | periodontitis | 2.331939 | 0.0130071 | 0.0460881 | 17/441 | 0.0030011 |
| DOID:120 | female reproductive organ cancer | 2.082089 | 0.0112242 | 0.0669509 | 47/441 | 0.0035939 |
| DOID:1115 | sarcoma | 2.332428 | 0.0122961 | 0.0812727 | 26/441 | 0.0037756 |
| DOID:1793 | pancreatic cancer | 2.196253 | 0.0116994 | 0.0865672 | 36/441 | 0.0037756 |
| DOID:3070 | malignant glioma | 1.780635 | 0.0243180 | 0.0771751 | 28/441 | 0.0037756 |
| DOID:7148 | rheumatoid arthritis | 2.484654 | 0.0110882 | 0.0938745 | 50/441 | 0.0038534 |
Enrichments for Module #FE61CD (MTcor=0.92):
GSEA has 274 enriched terms
ORA has 4 enriched terms
3 terms are enriched in both methods
| ID | Description | NES | pval_GSEA | pval_ORA | GeneRatio | qvalue |
|---|---|---|---|---|---|---|
| DOID:114 | heart disease | 2.353917 | 0.0108995 | 0.0224552 | 13/56 | 0.0070826 |
| DOID:7148 | rheumatoid arthritis | 2.661544 | 0.0108551 | 0.0294449 | 13/56 | 0.0070826 |
| DOID:2237 | hepatitis | 2.313647 | 0.0111976 | 0.0742715 | 11/56 | 0.0133989 |
Enrichments for Module #00BADE (MTcor=-0.9):
GSEA has 93 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods
Enrichments for Module #D177FF (MTcor=-0.91):
GSEA has 87 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods
compare_methods(GSEA_DGN, ORA_DGN)
Enrichments for Module #88AC00 (MTcor=0.96):
GSEA has 268 enriched terms
ORA has 16 enriched terms
14 terms are enriched in both methods
| ID | Description | NES | pval_GSEA | pval_ORA | GeneRatio | qvalue |
|---|---|---|---|---|---|---|
| umls:C0036421 | Systemic Scleroderma | 2.242510 | 0.0481977 | 0.0001969 | 57/771 | 0.0000283 |
| umls:C1519670 | Tumor Angiogenesis | 2.082911 | 0.0493983 | 0.0003151 | 49/771 | 0.0000388 |
| umls:C0206659 | Embryonal Carcinoma | 2.121301 | 0.0548885 | 0.0001798 | 26/771 | 0.0000283 |
| umls:C0278504 | Non-small cell lung cancer stage I | 2.568079 | 0.0581732 | 0.0000001 | 21/771 | 0.0000000 |
| umls:C0238288 | Muscular Dystrophy, Facioscapulohumeral | 2.323388 | 0.0582387 | 0.0001129 | 17/771 | 0.0000243 |
| umls:C0595936 | Aqueous Humor Disorders | 2.459832 | 0.0585251 | 0.0015440 | 15/771 | 0.0001664 |
| umls:C2986658 | Diffuse Intrinsic Pontine Glioma | 2.671135 | 0.0614463 | 0.0000000 | 13/771 | 0.0000000 |
| umls:C0004623 | Bacterial Infections | 2.438379 | 0.0533027 | 0.0142350 | 28/771 | 0.0013639 |
| umls:C0153676 | Secondary malignant neoplasm of lung | 2.244884 | 0.0476762 | 0.0270225 | 55/771 | 0.0021254 |
| umls:C0003486 | Aortic Aneurysm | 2.343632 | 0.0537051 | 0.0271133 | 26/771 | 0.0021254 |
| umls:C3714514 | Infection | 2.386859 | 0.0480033 | 0.0412099 | 52/771 | 0.0029613 |
| umls:C0003864 | Arthritis | 2.212887 | 0.0484349 | 0.0797317 | 48/771 | 0.0049109 |
| umls:C0555198 | Malignant Glioma | 1.801436 | 0.0495028 | 0.0893191 | 42/771 | 0.0050175 |
| umls:C0220641 | Lip and Oral Cavity Carcinoma | 2.113410 | 0.0497893 | 0.0930999 | 41/771 | 0.0050175 |
Enrichments for Module #FE61CD (MTcor=0.92):
GSEA has 457 enriched terms
ORA has 33 enriched terms
29 terms are enriched in both methods
| ID | Description | NES | pval_GSEA | pval_ORA | GeneRatio | qvalue |
|---|---|---|---|---|---|---|
| umls:C0003864 | Arthritis | 2.398904 | 0.0475144 | 0.0003388 | 14/76 | 0.0000596 |
| umls:C0151744 | Myocardial Ischemia | 2.243656 | 0.0477202 | 0.0015810 | 13/76 | 0.0001853 |
| umls:C0243026 | Sepsis | 2.087043 | 0.0472867 | 0.0033051 | 13/76 | 0.0002785 |
| umls:C0036421 | Systemic Scleroderma | 2.517820 | 0.0472266 | 0.0035647 | 13/76 | 0.0002785 |
| umls:C3495559 | Juvenile arthritis | 2.629878 | 0.0502213 | 0.0011176 | 11/76 | 0.0001572 |
| umls:C0018133 | Graft-vs-Host Disease | 2.263717 | 0.0514152 | 0.0001992 | 11/76 | 0.0000467 |
| umls:C0024314 | Lymphoproliferative Disorders | 2.249801 | 0.0532379 | 0.0000998 | 10/76 | 0.0000351 |
| umls:C0333186 | Restenosis | 2.458536 | 0.0532379 | 0.0000998 | 10/76 | 0.0000351 |
| umls:C0524909 | Hepatitis B, Chronic | 2.126012 | 0.0526478 | 0.0025167 | 9/76 | 0.0002528 |
| umls:C0042373 | Vascular Diseases | 2.439313 | 0.0492032 | 0.0063444 | 11/76 | 0.0004056 |
| umls:C0035126 | Reperfusion Injury | 2.222380 | 0.0503175 | 0.0082315 | 10/76 | 0.0004453 |
| umls:C0001175 | Acquired Immunodeficiency Syndrome | 2.064782 | 0.0555519 | 0.0056693 | 7/76 | 0.0003987 |
| umls:C0022876 | Premature Obstetric Labor | 2.620135 | 0.0533069 | 0.0106074 | 8/76 | 0.0005328 |
| umls:C0020517 | Hypersensitivity | 2.420816 | 0.0549194 | 0.0122384 | 7/76 | 0.0005738 |
| umls:C0034362 | Q Fever | 2.446408 | 0.0622051 | 0.0074419 | 4/76 | 0.0004361 |
| umls:C0266929 | Chronic Periodontitis | 2.475921 | 0.0547074 | 0.0169237 | 7/76 | 0.0007439 |
| umls:C0948089 | Acute Coronary Syndrome | 2.528965 | 0.0510830 | 0.0222338 | 9/76 | 0.0008672 |
| umls:C0019829 | Hodgkin Disease | 2.378508 | 0.0494691 | 0.0246618 | 10/76 | 0.0008672 |
| umls:C0020445 | Hypercholesterolemia, Familial | 2.266099 | 0.0544058 | 0.0230091 | 7/76 | 0.0008672 |
| umls:C0011644 | Scleroderma | 2.318639 | 0.0543920 | 0.0241806 | 7/76 | 0.0008672 |
| umls:C0006274 | Bronchiolitis, Viral | 2.713964 | 0.0542718 | 0.0266719 | 7/76 | 0.0008932 |
| umls:C0019158 | Hepatitis | 1.942743 | 0.0523316 | 0.0361350 | 8/76 | 0.0011466 |
| umls:C0031099 | Periodontitis | 2.574251 | 0.0523107 | 0.0374987 | 8/76 | 0.0011466 |
| umls:C0033860 | Psoriasis | 2.175157 | 0.0466466 | 0.0441043 | 12/76 | 0.0011929 |
| umls:C0018213 | Graves Disease | 2.024518 | 0.0505829 | 0.0420883 | 9/76 | 0.0011929 |
| umls:C1333064 | Classical Hodgkin’s Lymphoma | 2.274196 | 0.0557943 | 0.0509690 | 6/76 | 0.0013276 |
| umls:C0037299 | Skin Ulcer | 2.235847 | 0.0607327 | 0.0571681 | 4/76 | 0.0014358 |
| umls:C0024305 | Lymphoma, Non-Hodgkin | 2.290440 | 0.0475144 | 0.0746457 | 11/76 | 0.0016934 |
| umls:C0017658 | Glomerulonephritis | 2.501363 | 0.0534931 | 0.0803000 | 7/76 | 0.0017647 |
Enrichments for Module #00BADE (MTcor=-0.9):
GSEA has 19 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods
Enrichments for Module #D177FF (MTcor=-0.91):
GSEA has 38 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods
sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_GB.UTF-8 LC_COLLATE=en_GB.UTF-8
## [5] LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8
## [7] LC_PAPER=en_GB.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 parallel stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] WGCNA_1.69 fastcluster_1.1.25 dynamicTreeCut_1.63-1
## [4] org.Hs.eg.db_3.8.2 AnnotationDbi_1.46.1 IRanges_2.18.3
## [7] S4Vectors_0.22.1 Biobase_2.44.0 BiocGenerics_0.30.0
## [10] DOSE_3.10.2 ReactomePA_1.28.0 clusterProfiler_3.12.0
## [13] biomaRt_2.40.5 kableExtra_1.1.0 knitr_1.28
## [16] doParallel_1.0.15 iterators_1.0.12 foreach_1.5.0
## [19] polycor_0.7-10 expss_0.10.2 ggpubr_0.2.5
## [22] magrittr_1.5 GGally_1.5.0 gridExtra_2.3
## [25] viridis_0.5.1 viridisLite_0.3.0 RColorBrewer_1.1-2
## [28] dendextend_1.13.4 plotly_4.9.2 glue_1.4.1
## [31] reshape2_1.4.4 forcats_0.5.0 stringr_1.4.0
## [34] dplyr_1.0.0 purrr_0.3.4 readr_1.3.1
## [37] tidyr_1.1.0 tibble_3.0.1 ggplot2_3.3.2
## [40] tidyverse_1.3.0
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.1.0 RSQLite_2.2.0
## [3] htmlwidgets_1.5.1 grid_3.6.3
## [5] BiocParallel_1.18.1 munsell_0.5.0
## [7] codetools_0.2-16 preprocessCore_1.46.0
## [9] miniUI_0.1.1.1 withr_2.2.0
## [11] colorspace_1.4-1 GOSemSim_2.10.0
## [13] highr_0.8 rstudioapi_0.11
## [15] ggsignif_0.6.0 labeling_0.3
## [17] urltools_1.7.3 GenomeInfoDbData_1.2.1
## [19] polyclip_1.10-0 bit64_0.9-7
## [21] farver_2.0.3 vctrs_0.3.1
## [23] generics_0.0.2 xfun_0.12
## [25] R6_2.4.1 GenomeInfoDb_1.20.0
## [27] graphlayouts_0.7.0 locfit_1.5-9.4
## [29] DelayedArray_0.10.0 bitops_1.0-6
## [31] reshape_0.8.8 fgsea_1.10.1
## [33] gridGraphics_0.5-0 assertthat_0.2.1
## [35] promises_1.1.0 scales_1.1.1
## [37] ggraph_2.0.3 nnet_7.3-14
## [39] enrichplot_1.4.0 ggExtra_0.9
## [41] gtable_0.3.0 tidygraph_1.2.0
## [43] rlang_0.4.6 genefilter_1.66.0
## [45] splines_3.6.3 lazyeval_0.2.2
## [47] acepack_1.4.1 impute_1.58.0
## [49] broom_0.5.5 europepmc_0.4
## [51] checkmate_2.0.0 BiocManager_1.30.10
## [53] yaml_2.2.1 modelr_0.1.6
## [55] crosstalk_1.1.0.1 backports_1.1.8
## [57] httpuv_1.5.2 qvalue_2.16.0
## [59] Hmisc_4.4-0 tools_3.6.3
## [61] ggplotify_0.0.5 ellipsis_0.3.1
## [63] ggridges_0.5.2 Rcpp_1.0.4.6
## [65] plyr_1.8.6 base64enc_0.1-3
## [67] progress_1.2.2 zlibbioc_1.30.0
## [69] RCurl_1.98-1.2 prettyunits_1.1.1
## [71] rpart_4.1-15 cowplot_1.0.0
## [73] SummarizedExperiment_1.14.1 haven_2.2.0
## [75] ggrepel_0.8.2 cluster_2.1.0
## [77] fs_1.4.0 data.table_1.12.8
## [79] DO.db_2.9 triebeard_0.3.0
## [81] reprex_0.3.0 reactome.db_1.68.0
## [83] matrixStats_0.56.0 mime_0.9
## [85] xtable_1.8-4 hms_0.5.3
## [87] evaluate_0.14 XML_3.99-0.3
## [89] jpeg_0.1-8.1 readxl_1.3.1
## [91] compiler_3.6.3 crayon_1.3.4
## [93] htmltools_0.4.0 later_1.0.0
## [95] Formula_1.2-3 geneplotter_1.62.0
## [97] lubridate_1.7.4 DBI_1.1.0
## [99] tweenr_1.0.1 dbplyr_1.4.2
## [101] MASS_7.3-51.6 rappdirs_0.3.1
## [103] Matrix_1.2-18 cli_2.0.2
## [105] igraph_1.2.5 GenomicRanges_1.36.1
## [107] pkgconfig_2.0.3 rvcheck_0.1.8
## [109] foreign_0.8-76 xml2_1.2.5
## [111] annotate_1.62.0 webshot_0.5.2
## [113] XVector_0.24.0 rvest_0.3.5
## [115] digest_0.6.25 graph_1.62.0
## [117] rmarkdown_2.1 cellranger_1.1.0
## [119] fastmatch_1.1-0 htmlTable_1.13.3
## [121] curl_4.3 shiny_1.4.0.2
## [123] graphite_1.30.0 lifecycle_0.2.0
## [125] nlme_3.1-147 jsonlite_1.7.0
## [127] fansi_0.4.1 pillar_1.4.4
## [129] lattice_0.20-41 fastmap_1.0.1
## [131] httr_1.4.1 survival_3.1-12
## [133] GO.db_3.8.2 UpSetR_1.4.0
## [135] png_0.1-7 bit_1.1-15.2
## [137] ggforce_0.3.1 stringi_1.4.6
## [139] blob_1.2.1 DESeq2_1.24.0
## [141] latticeExtra_0.6-29 memoise_1.1.0